# -*- coding:utf8 -*-
# !/usr/bin/env python

'''
#全国企业信用信息公示系统（北京）
#维护黄羽
'''

import re
from scpy.logger import get_logger
from table import index, report_index, table_clean, parse_time, time_clean, money_notclean
import copy
from bj_get_base_info import base_info_run
from bj_get_year_report import year_report_run
import sys
# import bj_parse

reload(sys)
sys.setdefaultencoding('utf8')

logger = get_logger(__file__)


def extract_share_holder_list(share_holder_table):
    """
    解析股东信息table
    :param share_holder_table:股东信息的网页
    :return:解析后的字典
    """
    share_holder_list = []
    html = re.sub('<tr.*?>', '<tr>', share_holder_table)
    html = html.replace(' ', '').replace('\r', '').replace('\t', '').replace('\n', '')
    detail = re.findall('<tr>.*?</tr>', html)
    if detail and len(detail) > 3:
        for item in detail[2: -1]:
            html = item.replace('<td>', '@@@@@')
            html = re.sub('<td.+?>', '<td>', html)
            html = html.replace('@@@@@', '<td>')
            html = html.replace(' ', '').replace('\r', '').replace('\t', '').replace('\n', '')
            detail_td = re.findall("<td>(.*?)</td>", html)
            if detail_td and len(detail_td) >= 4:
                dic1 = {"shareholderType": detail_td[0], "shareholderName": detail_td[1],
                        "regCapCur": "", "country": "", "fundedRatio": "", "subConam": "", "conDate": ""}
                share_holder_list.append(dic1)
            else:
                raise Exception("添加网页解析方法！")
        return share_holder_list
    else:
        return []


def base_td(base_table, world):
    td_list = re.findall("(<td.*?</td>)", base_table, re.S)
    content = ""
    for num, item in enumerate(td_list[::2]):
        if world in item:
            content = re.findall("<td.*?>(.*?)</td>", td_list[1::2][num], re.S)
            if content:
                content = content[0].replace('&nbsp;', '').replace('\t', '').replace('\r', '').replace('\n',
                                                                                                       '').replace(' ',
                                                                                                                   '')
                content = re.sub('<.+?>', '', content)
                content = content if content else ""
                break
    return content


def base(base_table):
    if '统一社会信用代码/注册号' in base_table:
        regNo = base_td(base_table, '统一社会信用代码/注册号')
    elif '注册号/统一社会信用代码' in base_table:
        regNo = base_td(base_table, '注册号/统一社会信用代码')
    else:
        regNo = base_td(base_table, '注册号')
    enterpriseName = base_td(base_table, '名称')
    frName = base_td(base_table, '法定代表人')
    if not frName:
        frName = base_td(base_table, '负责人')
    if '注册资本' in base_table:
        regCap = money_notclean(base_td(base_table, '注册资本'))
    else:
        regCap = ''
    regCapCur = base_td(base_table, '注册币种')
    if regCap:
        if regCapCur:
            pass
        else:
            if '美' in base_td(base_table, '注册资本'):
                regCapCur = '美元'
            else:
                regCapCur = '人民币'
    esDate = time_clean(base_td(base_table, '成立日期'))
    openFrom = time_clean(base_td(base_table, '经营期限自'))
    if not openFrom:
        openFrom = time_clean(base_td(base_table, '营业期限自'))
    openTo = time_clean(base_td(base_table, '经营期限至'))
    if not openTo:
        openTo = time_clean(base_td(base_table, '营业期限至'))
    auditDate = time_clean(base_td(base_table, '核准日期'))
    enterpriseType = base_td(base_table, '类型')
    enterpriseStatus = base_td(base_table, '登记状态')
    cancelDate = time_clean(base_td(base_table, '注销日期'))
    revokeDate = time_clean(base_td(base_table, '吊销日期'))
    address = base_td(base_table, '住所')
    if not address:
        address = base_td(base_table, '营业场所')
    abuItem = base_td(base_table, '许可经营项目')
    cbuItem = base_td(base_table, '一般经营项目')
    operateScope = base_td(base_table, '经营范围')
    operateScopeAndForm = base_td(base_table, '经营(业务)范围及方式')
    regOrg = base_td(base_table, '登记机关')
    ancheYear = time_clean(base_td(base_table, '最后年检年度'))
    ancheDate = time_clean(base_td(base_table, '最后年检日期'))

    industryPhyCode = base_td(base_table, '行业门类代码')
    industryPhyName = base_td(base_table, '行业门类名称')
    industryCode = base_td(base_table, '国民经济行业代码')
    industryName = base_td(base_table, '国民经济行业名称')
    recCap = base_td(base_table, '实收资本')
    oriRegNo = base_td(base_table, '原注册号')
    result = [
        {"regNo": regNo, "enterpriseName": enterpriseName, "frName": frName, "regCap": regCap, "regCapCur": regCapCur,
         "esDate": esDate, "openFrom": openFrom, "openTo": openTo, "enterpriseType": enterpriseType,
         "auditDate": auditDate, "enterpriseStatus": enterpriseStatus, "cancelDate": cancelDate,
         "revokeDate": revokeDate, "address": address, "abuItem": abuItem, "cbuItem": cbuItem,
         "operateScope": operateScope, "operateScopeAndForm": operateScopeAndForm, "regOrg": regOrg,
         "ancheYear": ancheYear, "ancheDate": ancheDate, "industryPhyCode": industryPhyCode,
         "industryPhyName": industryPhyName, "industryCode": industryCode, "industryName": industryName,
         "recCap": recCap, "oriRegNo": oriRegNo}]
    return result


def share_holder(share_table):
    share_holder_tr = re.findall("(<tr.*?</tr>)", share_table, re.S)
    shareHolderList = []
    if share_holder_tr and len(share_holder_tr) > 3:
        for item in share_holder_tr[1:-2]:
            tds = map(lambda x: x.replace("&nbsp;", "").replace(" ", ""), re.findall("<td.*?>(.*?)</td>", item, re.S))
            if tds and len(tds) == 9:
                shareHolderList.append({
                    'shareholderName': tds[1], 'shareholderType': tds[2], 'country': '', 'subConam': float(tds[3]),
                    'regCapCur': '', 'conDate': time_clean(tds[5]), 'fundedRatio': '', })
    return shareHolderList


def person(person_table):
    trs = re.findall("<tr.*?tr>", person_table, re.S)
    personList = []
    if trs and len(trs) > 2:
        for item in trs[1:-1]:
            tds = map(lambda x: x.replace("&nbsp;", "").replace(" ", ""), re.findall("<td.*?>(.*?)</td>", item, re.S))
            if tds and len(tds) == 4:
                personList.append({"name": tds[1], "position": tds[2], "sex": tds[3]})
    return personList


def alter(alter_table_list):
    alterList = []
    for alter_table in alter_table_list:
        alter_item = {"altItem": "", "altBe": "", "altAf": "", "altDate": ""}
        tables = re.findall("<table.*?table>", alter_table, re.S)
        if tables and len(tables) >= 2:
            title_table = tables[0]
            content_table = tables[1]
            titles = re.findall("<td.*?>(.*?)</td>", title_table, re.S)
            if titles and len(titles) >= 4:
                alter_item["altDate"] = time_clean(titles[1])
                alter_item["altItem"] = titles[3]
            content = re.findall("<td.*?>(.*?)</td>", content_table, re.S)

            if content and len(content) >= 2:
                alter_item["altBe"] = titles[0]
                alter_item["altAf"] = titles[1]
        alterList.append(alter_item)
    return alterList


def extract_base(raw_base_html):
    """
    解析工商基本信息
    :param raw_base_html:html源码
    :return:解析后的工商字典
    """
    if not raw_base_html:
        return None

    init_dict = {
        'province': 'bj',
        'basicList': [],
        'shareHolderList': [],
        'personList': [],
        'punishBreakList': [],
        'alidebtList': [],
        'entinvItemList': [],
        'frinvList': [],
        'frPositionList': [],
        'alterList': [],
        'filiationList': [],
        'caseInfoList': [],
        'sharesFrostList': [],
        'sharesImpawnList': [],
        'morDetailList': [],
        'morguaInfoList': [],
        'liquidationList': [],
        'checkMessage': [],
        "abnormalOperation": [],
    }
    res_dict = copy.deepcopy(init_dict)
    base_html = raw_base_html["base"]
    base_table = table_clean(base_html, "名称")
    if base_table:
        basic_list = base(base_table)
        res_dict['basicList'] = basic_list

    share_holder_html = raw_base_html["shareHolder"]
    share_holder_table = table_clean(share_holder_html, "投资")
    if share_holder_table:
        share_holder_list = share_holder(share_holder_table)
        res_dict['shareHolderList'] = share_holder_list

    alter_html_list = raw_base_html["alter"]
    res_dict['alterList'] = alter(alter_html_list)

    person_html_list = raw_base_html["member"]
    person_table = table_clean(person_html_list, "序号")
    if person_table:
        person_list = person(person_table)
        res_dict['personList'] = person_list

    # filiation_table = table_clean(res, "分支机构信息")
    # if filiation_table:
    #     filiation_list = index("分支机构信息", filiation_table)
    #     res_dict['filiationList'] = filiation_list

    # liquidation_table = table_clean(res, "清算信息")
    # if liquidation_table:
    #     liquidation_list = index("清算信息", liquidation_table)
    #     res_dict['liquidationList'] = liquidation_list

    # abnormal_operation_table = table_clean(res, "经营异常信息")
    # if abnormal_operation_table:
    #     abnormal_operation_list = index("经营异常信息", abnormal_operation_table)
    #     for item in abnormal_operation_list:
    #         recause = item.get('recause', None)
    #         item['recause'] = re.sub('<.+?>', ' ', recause) if recause else ''
    #     res_dict['abnormalOperation'] = abnormal_operation_list
    #
    # check_message_table = table_clean(res, "抽查检查信息")
    # if check_message_table:
    #     check_message_list = index("抽查检查信息", check_message_table)
    #     res_dict['checkMessage'] = check_message_list

    return res_dict


def extract_year_report(year_report_html_list):
    """
    解析年报信息
    :param year_report_html_list: 年报源码
    :return: 年报字典
    """
    if not year_report_html_list:
        return None
    year_report_list = []
    init_report = {
        'baseInfo': {},
        'website': {},
        'investorInformations': [],
        'assetsInfo': {},
        'equityChangeInformations': [],
        'changeRecords': [],
    }

    for item in year_report_html_list:
        if not isinstance(item, tuple) or len(item) != 2:
            raise Exception("'year_report_tuple' type error!")

        year = item[0]
        html = item[1]

        report_dict = copy.deepcopy(init_report)

        report_basic_table = table_clean(html, '企业基本信息')
        if report_basic_table:
            report_basic_dict = report_index('企业基本信息', report_basic_table)
            report_dict['baseInfo'] = report_basic_dict

        report_website_table = table_clean(html, '网站或网店信息')
        if report_website_table:
            report_website_dict = report_index('网站或网店信息', report_website_table)
            report_dict['website'] = report_website_dict

        report_assetsInfo_table = table_clean(html, '企业资产状况信息')
        if report_assetsInfo_table:
            report_assetsInfo_dict = report_index('企业资产状况信息', report_assetsInfo_table)
            report_dict['assetsInfo'] = report_assetsInfo_dict

        report_investorInformations_table = table_clean(html, '股东及出资信息')
        if report_investorInformations_table:
            report_investorInformations_list = report_index('股东及出资信息', report_investorInformations_table)
            report_dict['investorInformations'] = report_investorInformations_list

        report_equityChangeInformations_table = table_clean(html, '股权变更信息')
        if report_equityChangeInformations_table:
            report_equityChangeInformations_list = report_index('股权变更信息', report_equityChangeInformations_table)
            report_dict['equityChangeInformations'] = report_equityChangeInformations_list

        report_changeRecords_table = table_clean(html, '修改记录')
        if report_changeRecords_table:
            report_changeRecords_list = report_index('修改记录', report_changeRecords_table)
            report_dict['changeRecords'] = report_changeRecords_list

        report_dict['year'] = year

        year_report_list.append(report_dict)

    return year_report_list


def get_asic(companyName):
    """
    从旧工商网站获取工商html源码,并对进行源码解析
    :param companyName:公司名字或注册号
    :return:None 或者　一个包含html和公司基本信息的tuple
    若公司不存在,返回None;
    若存在,返回一个包含html和公司基本信息的tuple
    """
    html_res = base_info_run(companyName)
    if html_res:
        asic_dict = extract_base(html_res)
        return html_res, asic_dict
    elif html_res is None:
        return None
    else:
        raise Exception("错误！")


def get_year_report(companyName):
    """
    从新工商网站获取公司年报信息源码,并进行解析
    :param companyName:公司名字或注册号
    :return:None 或者　list,
    若公司不存在,返回None;
    若公司存在但年报不存在,返回[];
    若年报存在返回年报,返回的类型list,list里面是一个包含年份和html源码的tuple,即格式为[('2013', 'html'),('2014', 'html')...]
    """
    year_report_html_list = year_report_run(companyName)
    if year_report_html_list and isinstance(year_report_html_list, list):
        year_report_list = extract_year_report(year_report_html_list)
        year_report_raw_html_list = []
        for item in year_report_html_list:
            year_report_raw_html_list.append({item[0]: item[1]})
        return year_report_raw_html_list, year_report_list

    elif year_report_html_list is None:
        return None
    elif year_report_html_list is []:
        return []
    else:
        raise Exception("错误！")


def search2(companyName):
    """
    对工商基本信息和年报信息进行整合
    :param companyName: 公司名字或注册号
    :return:None 或者　一个包含源码字典和工商基本信息字典的tuple
    """
    # raw_html_dict = {}
    asic_tuple = get_asic(companyName)
    if asic_tuple is None:
        return None

    year_report_tuple = get_year_report(companyName)
    if year_report_tuple is None:
        # raise Exception("基本信息和公司年报的公司名字可能不对应！")
        return None  # 公司不存在
    elif year_report_tuple is []:
        year_report_html_list = []  # 公司存在,年报不存在
    elif year_report_tuple and isinstance(year_report_tuple, tuple):
        year_report_html_list = year_report_tuple[0]
    else:
        raise Exception("错误！")

    asic_raw_html = asic_tuple[0]

    raw_html_dict = {
        'province': 'bj',
        'type': '0',
        'html': asic_raw_html,
        'yearList': year_report_html_list,
        'keyword': companyName,
        'companyName': asic_tuple[1]['basicList'][0].get('enterpriseName', ''),
        'json': '',
    }

    res_dict = asic_tuple[1]
    res_dict['yearReportList'] = year_report_tuple[1]

    return raw_html_dict, res_dict


def search(companyName):
    """
    for 实时爬虫
    :param companyName: 公司名字或注册号
    :return:None 或者　一个包含工商基本信息字典
    """
    result = search2(companyName)
    if result is None:
        return None
    elif result and isinstance(result, tuple) and len(result) == 2:
        return result[1]
    else:
        raise Exception("错误!")


if __name__ == "__main__":
    # companyName = '北京百度糯米信息技术有限公司'
    # companyName = '110000450203508'
    # companyName = '北京金顺蝶科技有限公司'
    # companyName = '北京大麦佳居商贸有限责任公司'
    companyName = '北京天下神威科技有限公司'
    # companyName = '110000450203507'
    # res = get_year_report(companyName)
    # res = search(companyName)
    res = search2(companyName)
    import json

    print json.dumps(res, indent=4, ensure_ascii=False)
    # res = extract(companyName)
    # res = search(companyName)

    # print res
